library(ggplot2)
library(nortest)
movies<- read.csv("../datos/movies_2026.csv")
colnames(movies)
 [1] "id"                        "budget"                   
 [3] "genres"                    "homePage"                 
 [5] "productionCompany"         "productionCompanyCountry" 
 [7] "productionCountry"         "revenue"                  
 [9] "runtime"                   "video"                    
[11] "director"                  "actors"                   
[13] "actorsPopularity"          "actorsCharacter"          
[15] "originalTitle"             "title"                    
[17] "originalLanguage"          "popularity"               
[19] "releaseDate"               "voteAvg"                  
[21] "voteCount"                 "genresAmount"             
[23] "productionCoAmount"        "productionCountriesAmount"
[25] "actorsAmount"              "castWomenAmount"          
[27] "castMenAmount"             "releaseYear"              

Ejercicio 1

numericas <- c("budget","revenue","runtime","genresAmount","productionCoAmount","productionCountriesAmount","actorsAmount","castWomenAmount",
"castMenAmount","releaseYear", "popularity", "voteCount", "voteAvg") #Seleccionar solo numéricas
movies_num <- movies[, numericas]
colnames(movies_num)
 [1] "budget"                    "revenue"                  
 [3] "runtime"                   "genresAmount"             
 [5] "productionCoAmount"        "productionCountriesAmount"
 [7] "actorsAmount"              "castWomenAmount"          
 [9] "castMenAmount"             "releaseYear"              
[11] "popularity"                "voteCount"                
[13] "voteAvg"                  
movies_cat <- movies[, !names(movies) %in% c(numericas, "id")] #Seleccionar solo categóricas
names(movies_cat)
 [1] "genres"                   "homePage"                 "productionCompany"       
 [4] "productionCompanyCountry" "productionCountry"        "video"                   
 [7] "director"                 "actors"                   "actorsPopularity"        
[10] "actorsCharacter"          "originalTitle"            "title"                   
[13] "originalLanguage"         "releaseDate"             

El conjunto de datos está formado por ’13 variables cuantitativas y ’14 variables cualitativas.

summary(movies_num)
     budget             revenue             runtime        genresAmount   
 Min.   :        0   Min.   :0.000e+00   Min.   :  0.00   Min.   : 0.000  
 1st Qu.:        0   1st Qu.:0.000e+00   1st Qu.: 10.00   1st Qu.: 1.000  
 Median :        0   Median :0.000e+00   Median : 86.00   Median : 2.000  
 Mean   :  9413280   Mean   :2.879e+07   Mean   : 66.09   Mean   : 1.949  
 3rd Qu.:  1000000   3rd Qu.:3.306e+05   3rd Qu.:103.00   3rd Qu.: 3.000  
 Max.   :380000000   Max.   :2.847e+09   Max.   :750.00   Max.   :16.000  
                                                                          
 productionCoAmount productionCountriesAmount  actorsAmount    castWomenAmount 
 Min.   : 0.000     Min.   :  0.00            Min.   :     0   Min.   :     0  
 1st Qu.: 0.000     1st Qu.:  1.00            1st Qu.:     3   1st Qu.:     0  
 Median : 1.000     Median :  1.00            Median :     9   Median :     2  
 Mean   : 1.973     Mean   :  1.23            Mean   :  1082   Mean   :  3517  
 3rd Qu.: 3.000     3rd Qu.:  1.00            3rd Qu.:    21   3rd Qu.:     6  
 Max.   :89.000     Max.   :155.00            Max.   :919590   Max.   :922162  
                                                               NA's   :37      
 castMenAmount     releaseYear     popularity          voteCount      
 Min.   :     0   Min.   :1902   Min.   :0.000e+00   Min.   :    0.0  
 1st Qu.:     0   1st Qu.:2013   1st Qu.:5.460e-02   1st Qu.:    0.0  
 Median :     3   Median :2021   Median :8.502e+00   Median :    6.0  
 Mean   :  8224   Mean   :2017   Mean   :2.625e+01   Mean   :  675.9  
 3rd Qu.:    12   3rd Qu.:2025   3rd Qu.:2.224e+01   3rd Qu.:  423.0  
 Max.   :922017   Max.   :2026   Max.   :1.147e+04   Max.   :30788.0  
 NA's   :162      NA's   :2                                           
    voteAvg      
 Min.   : 0.000  
 1st Qu.: 0.000  
 Median : 5.400  
 Mean   : 3.837  
 3rd Qu.: 6.800  
 Max.   :10.000  
                 

Ejercicio 2

Este ejercicio se realizo en una tabla en el documento de entrega del laboratorio.

Ejercicio 3

hist(movies_num$budget)

boxplot(movies_num$budget)

lillie.test(movies_num$budget)

    Lilliefors (Kolmogorov-Smirnov) normality test

data:  movies_num$budget
D = 0.37204, p-value < 2.2e-16

La variable “budget” no sigue una distribución normal.

hist(movies_num$revenue)

boxplot(movies_num$revenue)

lillie.test(movies_num$revenue)

    Lilliefors (Kolmogorov-Smirnov) normality test

data:  movies_num$revenue
D = 0.39765, p-value < 2.2e-16
hist(movies_num$runtime)

boxplot(movies_num$runtime)

lillie.test(movies_num$runtime)

    Lilliefors (Kolmogorov-Smirnov) normality test

data:  movies_num$runtime
D = 0.1629, p-value < 2.2e-16
hist(movies_num$genresAmount)

boxplot(movies_num$genresAmount)

lillie.test(movies_num$genresAmount)

    Lilliefors (Kolmogorov-Smirnov) normality test

data:  movies_num$genresAmount
D = 0.19266, p-value < 2.2e-16
hist(movies_num$productionCoAmount)

boxplot(movies_num$productionCoAmount)

lillie.test(movies_num$productionCoAmount)

    Lilliefors (Kolmogorov-Smirnov) normality test

data:  movies_num$productionCoAmount
D = 0.20818, p-value < 2.2e-16
hist(movies_num$productionCountriesAmount)

boxplot(movies_num$productionCountriesAmount)

lillie.test(movies_num$productionCountriesAmount)

    Lilliefors (Kolmogorov-Smirnov) normality test

data:  movies_num$productionCountriesAmount
D = 0.3698, p-value < 2.2e-16
hist(movies_num$actorsAmount)

boxplot(movies_num$actorsAmount)

lillie.test(movies_num$actorsAmount)

    Lilliefors (Kolmogorov-Smirnov) normality test

data:  movies_num$actorsAmount
D = 0.51124, p-value < 2.2e-16
hist(movies_num$castWomenAmount)

boxplot(movies_num$castWomenAmount)

lillie.test(movies_num$castWomenAmount)

    Lilliefors (Kolmogorov-Smirnov) normality test

data:  movies_num$castWomenAmount
D = 0.52228, p-value < 2.2e-16
hist(movies_num$castMenAmount)

boxplot(movies_num$castMenAmount)

lillie.test(movies_num$castMenAmount)

    Lilliefors (Kolmogorov-Smirnov) normality test

data:  movies_num$castMenAmount
D = 0.52698, p-value < 2.2e-16
hist(movies_num$releaseYear)

boxplot(movies_num$releaseYear)

lillie.test(movies_num$releaseYear)

    Lilliefors (Kolmogorov-Smirnov) normality test

data:  movies_num$releaseYear
D = 0.23746, p-value < 2.2e-16
hist(movies_num$popularity)

boxplot(movies_num$popularity)

lillie.test(movies_num$popularity)

    Lilliefors (Kolmogorov-Smirnov) normality test

data:  movies_num$popularity
D = 0.43322, p-value < 2.2e-16
hist(movies_num$voteCount)

boxplot(movies_num$voteCount)

lillie.test(movies_num$voteCount)

    Lilliefors (Kolmogorov-Smirnov) normality test

data:  movies_num$voteCount
D = 0.36364, p-value < 2.2e-16
hist(movies_num$voteAvg)

boxplot(movies_num$voteAvg)

lillie.test(movies_num$voteAvg)

    Lilliefors (Kolmogorov-Smirnov) normality test

data:  movies_num$voteAvg
D = 0.28937, p-value < 2.2e-16
table(movies$originalLanguage)

   ab    af    am    ar    as    az    be    bg    bn    bs    ca    cn    cs 
    1     4    15   106     2    17     1     5    45     6    36   100    36 
   cy    da    de    dv    el    en    es    et    eu    fa    fi    fr    ga 
    2    94   461     3    39 11961  1238    36     3    70    45  1094     4 
   gl    gu    he    hi    hr    ht    hu    hy    id    ig    is    it    ja 
    3     6    17   100    17     1    29     6   173     1     7   302   868 
   jv    ka    kk    km    kn    ko    ku    ky    la    lb    lt    lv    mk 
    5    12    11    14    30   336    10     6     1     2    34    24     4 
   ml    mn    mo    mr    ms    mt    my    nb    ne    nl    no    or    pa 
   59     2     1    20    17     2     5     7     5   192    54     2     7 
   pl    pt    qu    ro    ru    rw    se    si    sk    sl    sn    so    sq 
   80   628     1    25   190     2     3     3     8     9     1     1    10 
   sr    sv    sw    ta    te    th    tl    tr    uk    ur    uz    vi    xh 
   23   133     1    74    57    59   110   106    43     8     4    29     1 
   xx    zh    zu 
   92   365     1 
table(movies$video)

FALSE  TRUE 
19313    84 

Ejercicio 4.1

top_budget <- movies[order(movies$budget, decreasing = TRUE), c("originalTitle", "budget")]
head(top_budget, 10)

Ejercicio 4.2

top_revenue <- movies[order(movies$revenue, decreasing = TRUE), c("originalTitle", "revenue")]
head(top_revenue, 10)

Ejercicio 4.3

mas_votos <- movies[which.max(movies$voteCount), c("originalTitle", "voteCount")]
print(mas_votos)
      originalTitle voteCount
13402     Inception     30788

Ejercicio 4.4

peor_pelicula <- movies[which.min(movies$voteAvg), c("originalTitle", "voteAvg")]
print(peor_pelicula)
  originalTitle voteAvg
1       غوطه ور       0

Ejercicio 4.5

#¿Cuántas películas se hicieron en cada año?
peliculas_por_anio <- table(movies$releaseYear)
df_peliculas_anio <- as.data.frame(peliculas_por_anio)
colnames(df_peliculas_anio) <- c("Anio", "Cantidad")
#¿En qué año se hicieron más películas?
anio_mas_productivo <- df_peliculas_anio[which.max(df_peliculas_anio$Cantidad), ]
print(paste("El año con más películas fue:", anio_mas_productivo$Anio, "con", anio_mas_productivo$Cantidad, "películas."))
[1] "El año con más películas fue: 2025 con 7351 películas."
#Grafica de barras
df_peliculas_anio$AnioNum <- as.numeric(as.character(df_peliculas_anio$Anio))
ggplot(df_peliculas_anio, aes(x = AnioNum, y = Cantidad)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  labs(title = "Cantidad de Películas por Año",
       x = "Año de Lanzamiento",
       y = "Número de Películas") +
  theme_minimal()

Ejericio 4.6


movies$releaseDate <- as.Date(movies$releaseDate)

peliculas_recientes <- movies[order(movies$releaseDate, decreasing = TRUE), c("originalTitle", "releaseDate")]
v20_peliculas_recientes <- head(peliculas_recientes, 20)
v20_peliculas_recientes
LS0tCnRpdGxlOiAiTGFib3JhdG9yaW8gMSIKYXV0b3I6ICJKYXZpZXIgQ2jDoXZleiwgR2Vuc2VyIENhdGFsw6FuLCBTYW11ZWwgTWVqw61hIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFKQpgYGAKCmBgYHtyIHBhcXVldGVzfQpsaWJyYXJ5KGdncGxvdDIpCmxpYnJhcnkobm9ydGVzdCkKYGBgCgpgYGB7cn0KbW92aWVzPC0gcmVhZC5jc3YoIi4uL2RhdG9zL21vdmllc18yMDI2LmNzdiIpCmNvbG5hbWVzKG1vdmllcykKYGBgCgojIyMgRWplcmNpY2lvIDEKCmBgYHtyIHZhcmlhYmxlcyBudW3DqXJpY2FzfQpudW1lcmljYXMgPC0gYygiYnVkZ2V0IiwicmV2ZW51ZSIsInJ1bnRpbWUiLCJnZW5yZXNBbW91bnQiLCJwcm9kdWN0aW9uQ29BbW91bnQiLCJwcm9kdWN0aW9uQ291bnRyaWVzQW1vdW50IiwiYWN0b3JzQW1vdW50IiwiY2FzdFdvbWVuQW1vdW50IiwKImNhc3RNZW5BbW91bnQiLCJyZWxlYXNlWWVhciIsICJwb3B1bGFyaXR5IiwgInZvdGVDb3VudCIsICJ2b3RlQXZnIikgI1NlbGVjY2lvbmFyIHNvbG8gbnVtw6lyaWNhcwptb3ZpZXNfbnVtIDwtIG1vdmllc1ssIG51bWVyaWNhc10KY29sbmFtZXMobW92aWVzX251bSkKYGBgCgpgYGB7ciB2YXJpYWJsZXMgY2F0ZWfDs3JpY2FzfQptb3ZpZXNfY2F0IDwtIG1vdmllc1ssICFuYW1lcyhtb3ZpZXMpICVpbiUgYyhudW1lcmljYXMsICJpZCIpXSAjU2VsZWNjaW9uYXIgc29sbyBjYXRlZ8OzcmljYXMKbmFtZXMobW92aWVzX2NhdCkKYGBgCgpFbCBjb25qdW50byBkZSBkYXRvcyBlc3TDoSBmb3JtYWRvIHBvciAnYHIgbmNvbChtb3ZpZXNfbnVtKWAgdmFyaWFibGVzIGN1YW50aXRhdGl2YXMgeSAnYHIgbmNvbChtb3ZpZXNfY2F0KWAgdmFyaWFibGVzIGN1YWxpdGF0aXZhcy4KCmBgYHtyIFJlc3VtZW4gZGUgY29uanVudG8gZGUgZGF0b3MgbnVtw6lyaWNvc30Kc3VtbWFyeShtb3ZpZXNfbnVtKQpgYGAKCiMjIyBFamVyY2ljaW8gMgoKIyMgRXN0ZSBlamVyY2ljaW8gc2UgcmVhbGl6byBlbiB1bmEgdGFibGEgZW4gZWwgZG9jdW1lbnRvIGRlIGVudHJlZ2EgZGVsIGxhYm9yYXRvcmlvLgoKIyMjIEVqZXJjaWNpbyAzCgpgYGB7ciBub3JtYWxpZGFkIGRlIHZhcmlhYmxlcyBjdWFudGl0YXRpdmFzfQpoaXN0KG1vdmllc19udW0kYnVkZ2V0KQpib3hwbG90KG1vdmllc19udW0kYnVkZ2V0KQpsaWxsaWUudGVzdChtb3ZpZXNfbnVtJGJ1ZGdldCkKYGBgCgpMYSB2YXJpYWJsZSAiYnVkZ2V0IiBubyBzaWd1ZSB1bmEgZGlzdHJpYnVjacOzbiBub3JtYWwuCgpgYGB7cn0KaGlzdChtb3ZpZXNfbnVtJHJldmVudWUpCmJveHBsb3QobW92aWVzX251bSRyZXZlbnVlKQpsaWxsaWUudGVzdChtb3ZpZXNfbnVtJHJldmVudWUpCmBgYAoKYGBge3J9Cmhpc3QobW92aWVzX251bSRydW50aW1lKQpib3hwbG90KG1vdmllc19udW0kcnVudGltZSkKbGlsbGllLnRlc3QobW92aWVzX251bSRydW50aW1lKQpgYGAKCmBgYHtyfQpoaXN0KG1vdmllc19udW0kZ2VucmVzQW1vdW50KQpib3hwbG90KG1vdmllc19udW0kZ2VucmVzQW1vdW50KQpsaWxsaWUudGVzdChtb3ZpZXNfbnVtJGdlbnJlc0Ftb3VudCkKYGBgCgpgYGB7cn0KaGlzdChtb3ZpZXNfbnVtJHByb2R1Y3Rpb25Db0Ftb3VudCkKYm94cGxvdChtb3ZpZXNfbnVtJHByb2R1Y3Rpb25Db0Ftb3VudCkKbGlsbGllLnRlc3QobW92aWVzX251bSRwcm9kdWN0aW9uQ29BbW91bnQpCmBgYAoKYGBge3J9Cmhpc3QobW92aWVzX251bSRwcm9kdWN0aW9uQ291bnRyaWVzQW1vdW50KQpib3hwbG90KG1vdmllc19udW0kcHJvZHVjdGlvbkNvdW50cmllc0Ftb3VudCkKbGlsbGllLnRlc3QobW92aWVzX251bSRwcm9kdWN0aW9uQ291bnRyaWVzQW1vdW50KQpgYGAKCmBgYHtyfQpoaXN0KG1vdmllc19udW0kYWN0b3JzQW1vdW50KQpib3hwbG90KG1vdmllc19udW0kYWN0b3JzQW1vdW50KQpsaWxsaWUudGVzdChtb3ZpZXNfbnVtJGFjdG9yc0Ftb3VudCkKYGBgCgpgYGB7cn0KaGlzdChtb3ZpZXNfbnVtJGNhc3RXb21lbkFtb3VudCkKYm94cGxvdChtb3ZpZXNfbnVtJGNhc3RXb21lbkFtb3VudCkKbGlsbGllLnRlc3QobW92aWVzX251bSRjYXN0V29tZW5BbW91bnQpCmBgYAoKYGBge3J9Cmhpc3QobW92aWVzX251bSRjYXN0TWVuQW1vdW50KQpib3hwbG90KG1vdmllc19udW0kY2FzdE1lbkFtb3VudCkKbGlsbGllLnRlc3QobW92aWVzX251bSRjYXN0TWVuQW1vdW50KQpgYGAKCmBgYHtyfQpoaXN0KG1vdmllc19udW0kcmVsZWFzZVllYXIpCmJveHBsb3QobW92aWVzX251bSRyZWxlYXNlWWVhcikKbGlsbGllLnRlc3QobW92aWVzX251bSRyZWxlYXNlWWVhcikKYGBgCgpgYGB7cn0KaGlzdChtb3ZpZXNfbnVtJHBvcHVsYXJpdHkpCmJveHBsb3QobW92aWVzX251bSRwb3B1bGFyaXR5KQpsaWxsaWUudGVzdChtb3ZpZXNfbnVtJHBvcHVsYXJpdHkpCmBgYAoKYGBge3J9Cmhpc3QobW92aWVzX251bSR2b3RlQ291bnQpCmJveHBsb3QobW92aWVzX251bSR2b3RlQ291bnQpCmxpbGxpZS50ZXN0KG1vdmllc19udW0kdm90ZUNvdW50KQpgYGAKCmBgYHtyfQpoaXN0KG1vdmllc19udW0kdm90ZUF2ZykKYm94cGxvdChtb3ZpZXNfbnVtJHZvdGVBdmcpCmxpbGxpZS50ZXN0KG1vdmllc19udW0kdm90ZUF2ZykKYGBgCgpgYGB7cn0KdGFibGUobW92aWVzJG9yaWdpbmFsTGFuZ3VhZ2UpCnRhYmxlKG1vdmllcyR2aWRlbykKYGBgCgojIyMgRWplcmNpY2lvIDQuMQoKYGBge3J9CnRvcF9idWRnZXQgPC0gbW92aWVzW29yZGVyKG1vdmllcyRidWRnZXQsIGRlY3JlYXNpbmcgPSBUUlVFKSwgYygib3JpZ2luYWxUaXRsZSIsICJidWRnZXQiKV0KaGVhZCh0b3BfYnVkZ2V0LCAxMCkKYGBgCgojIyMgRWplcmNpY2lvIDQuMgoKYGBge3J9CnRvcF9yZXZlbnVlIDwtIG1vdmllc1tvcmRlcihtb3ZpZXMkcmV2ZW51ZSwgZGVjcmVhc2luZyA9IFRSVUUpLCBjKCJvcmlnaW5hbFRpdGxlIiwgInJldmVudWUiKV0KaGVhZCh0b3BfcmV2ZW51ZSwgMTApCmBgYAoKIyMjIEVqZXJjaWNpbyA0LjMKCmBgYHtyfQptYXNfdm90b3MgPC0gbW92aWVzW3doaWNoLm1heChtb3ZpZXMkdm90ZUNvdW50KSwgYygib3JpZ2luYWxUaXRsZSIsICJ2b3RlQ291bnQiKV0KcHJpbnQobWFzX3ZvdG9zKQpgYGAKCiMjIyBFamVyY2ljaW8gNC40CgpgYGB7cn0KcGVvcl9wZWxpY3VsYSA8LSBtb3ZpZXNbd2hpY2gubWluKG1vdmllcyR2b3RlQXZnKSwgYygib3JpZ2luYWxUaXRsZSIsICJ2b3RlQXZnIildCnByaW50KHBlb3JfcGVsaWN1bGEpCmBgYAoKIyMjIEVqZXJjaWNpbyA0LjUKCmBgYHtyfQojwr9DdcOhbnRhcyBwZWzDrWN1bGFzIHNlIGhpY2llcm9uIGVuIGNhZGEgYcOxbz8KcGVsaWN1bGFzX3Bvcl9hbmlvIDwtIHRhYmxlKG1vdmllcyRyZWxlYXNlWWVhcikKZGZfcGVsaWN1bGFzX2FuaW8gPC0gYXMuZGF0YS5mcmFtZShwZWxpY3VsYXNfcG9yX2FuaW8pCmNvbG5hbWVzKGRmX3BlbGljdWxhc19hbmlvKSA8LSBjKCJBbmlvIiwgIkNhbnRpZGFkIikKYGBgCgpgYGB7cn0KI8K/RW4gcXXDqSBhw7FvIHNlIGhpY2llcm9uIG3DoXMgcGVsw61jdWxhcz8KYW5pb19tYXNfcHJvZHVjdGl2byA8LSBkZl9wZWxpY3VsYXNfYW5pb1t3aGljaC5tYXgoZGZfcGVsaWN1bGFzX2FuaW8kQ2FudGlkYWQpLCBdCnByaW50KHBhc3RlKCJFbCBhw7FvIGNvbiBtw6FzIHBlbMOtY3VsYXMgZnVlOiIsIGFuaW9fbWFzX3Byb2R1Y3Rpdm8kQW5pbywgImNvbiIsIGFuaW9fbWFzX3Byb2R1Y3Rpdm8kQ2FudGlkYWQsICJwZWzDrWN1bGFzLiIpKQpgYGAKCmBgYHtyfQojR3JhZmljYSBkZSBiYXJyYXMKZGZfcGVsaWN1bGFzX2FuaW8kQW5pb051bSA8LSBhcy5udW1lcmljKGFzLmNoYXJhY3RlcihkZl9wZWxpY3VsYXNfYW5pbyRBbmlvKSkKZ2dwbG90KGRmX3BlbGljdWxhc19hbmlvLCBhZXMoeCA9IEFuaW9OdW0sIHkgPSBDYW50aWRhZCkpICsKICBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5IiwgZmlsbCA9ICJzdGVlbGJsdWUiKSArCiAgbGFicyh0aXRsZSA9ICJDYW50aWRhZCBkZSBQZWzDrWN1bGFzIHBvciBBw7FvIiwKICAgICAgIHggPSAiQcOxbyBkZSBMYW56YW1pZW50byIsCiAgICAgICB5ID0gIk7Dum1lcm8gZGUgUGVsw61jdWxhcyIpICsKICB0aGVtZV9taW5pbWFsKCkKYGBgCgojIEVqZXJpY2lvIDQuNgoKYGBge3J9Cgptb3ZpZXMkcmVsZWFzZURhdGUgPC0gYXMuRGF0ZShtb3ZpZXMkcmVsZWFzZURhdGUpCgpwZWxpY3VsYXNfcmVjaWVudGVzIDwtIG1vdmllc1tvcmRlcihtb3ZpZXMkcmVsZWFzZURhdGUsIGRlY3JlYXNpbmcgPSBUUlVFKSwgYygib3JpZ2luYWxUaXRsZSIsICJyZWxlYXNlRGF0ZSIpXQp2MjBfcGVsaWN1bGFzX3JlY2llbnRlcyA8LSBoZWFkKHBlbGljdWxhc19yZWNpZW50ZXMsIDIwKQp2MjBfcGVsaWN1bGFzX3JlY2llbnRlcwpgYGAK